1 Figure 7

In this sub-vignette we present the analysis and source code for figure 7. This sub-vignette can be built along with all other sub-vignettes by running CLLCytokineScreen2021.Rmd.

1.1 Set up

Load libraries

library(patchwork)
library(maxstat)
library(survminer)
library(survival)
library(ggpubr)
library(rlang)
library(ggbeeswarm)
library(dplyr)

Set plot directory

plotDir = ifelse(exists(".standalone"), "", "../../inst/figs/")
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
## Warning in dir.create(plotDir): '..\..\inst\figs' existiert bereits

1.2 Load data

Raw

#ihc_patient_data: 596 x 7 tibble containing IHC staining intensities for pIRAK4, pSTAT6 and STAT6 staining in CLL and healthy tissue 
load( "../../data/ihc_patient_data.RData")

#ihc_surv_data: 200 x 15 tibble containing IHC staining intensities for pIRAK4, pSTAT6 and STAT6 staining in CLL and healthy tissue, along with meta data (including clinical outcomes) on each patient sample
load( "../../data/ihc_surv_data.RData")
#ihc_patient_data: 596 x 7 tibble containing IHC staining intensities forpIRAK4, pSTAT6 and STAT6 staining in CLL and healthy tissue 
ihc_patient_data <- read.table(file= "../../inst/extdata/ihc_patient_data.txt",header = TRUE) %>% as_tibble() %>% dplyr::group_by(PatientID)

#ihc_surv_data: 200 x 15 tibble containing IHC staining intensities for pIRAK4, pSTAT6 and STAT6 staining in CLL and healthy tissue, along with meta data (including clinical outcomes) on each patient sample
ihc_surv_data <- read.table(file= "../../inst/extdata/ihc_surv_data.txt",header = TRUE) %>% as_tibble() %>% dplyr::group_by(PatientID)

Process

#convert stain type to factor
ihc_patient_data$Stain <- factor(ihc_patient_data$Stain, levels = c("pSTAT6", "STAT6" , "pIRAK4"))

ihc_patient_data<-filter(ihc_patient_data, !is.na(Diagnosis))

1.3 Set aesthetics

source("../../R/themes_colors.R")

1.4 Plot Figures

1.4.1 Figure 7A

Beeswarm-boxplot of mean staining intensities of pSTAT6, for healthy and CLL LN samples

plotTab<-
  ihc_patient_data %>%
  #get staining intensities for pSTAT6 only
  dplyr::filter(Stain %in% c("pSTAT6"))
 
Fig7A <- 
  #plot staining intensity, stratified by CLL / Healthy LN
  ggplot(plotTab, aes(x = Tissue, y = Intensity)) +
  geom_boxplot() +
  geom_beeswarm(aes(color=Tissue), alpha=1, cex=2) +
  #add p values
  stat_compare_means(method = "t.test", comparisons=list(c(1,2)), size=6) +
  scale_color_manual(values = c(palreds[8], palblues[2])) +
  xlab("") +
  ylab("Mean pSTAT6 Staining Intensity") +
  guides(color = "none") +
  scale_x_discrete(labels = c( "CLL"="CLL-infiltrated \nlymph nodes", "LK"="Non-neoplastic \nlymph nodes")) +
  coord_cartesian(clip = "off") +
  #add preset theme 2
  t2

Fig7A

plotTab %>% 
  group_by(Tissue) %>% 
  count()
## # A tibble: 2 x 2
## # Groups:   Tissue [2]
##   Tissue     n
##   <chr>  <int>
## 1 CLL      100
## 2 LK        98

1.4.2 Figure 7B

Beeswarm-boxplot of mean staining intensities of pIRAK4, for healthy and CLL LN samples

Fig7B <-
  ihc_patient_data %>%
  #filter for pIRAK4 staining intensities only
  dplyr::filter(Stain%in%c("pIRAK4")) %>%
  #plot staining intensity, stratified by CLL / Healthy LN
  ggplot(aes(x=Tissue, y=Intensity)) +
  geom_boxplot() +
  geom_beeswarm(aes(color=Tissue), alpha=1, cex=2) +
  #add p values 
  stat_compare_means(method = "t.test", comparisons=list(c(1,2)), size=6) +
  scale_color_manual(values = c(palreds[8], palblues[2])) +
  xlab("") +
  ylab("Mean pIRAK4 Staining Intensity") +
  guides(color="none") +
  scale_x_discrete(labels = c( "CLL"="CLL-infiltrated \nlymph nodes", "LK"="Non-neoplastic \nlymph nodes")) +
  coord_cartesian(clip = "off") +
  #add preset theme 2
  t2



Fig7B

 ihc_patient_data %>%
  #filter for pIRAK4 staining intensities only
  dplyr::filter(Stain%in%c("pIRAK4"))  %>% 
  group_by(Diagnosis) %>% 
  count()
## # A tibble: 2 x 2
## # Groups:   Diagnosis [2]
##   Diagnosis     n
##   <chr>     <int>
## 1 CLL         100
## 2 healthy     100

1.4.3 Figure 7C

Image of IHC cross-section of CLL-infiltrated LN stained for pSTAT6

Fig7C <-cowplot::ggdraw() +
  cowplot::draw_image( "../../inst/images/CLL1_I3_pSTAT6.png", scale = 1)

Fig7C

1.4.4 Figure 7D

Image of IHC cross-section of healthy LN stained for pSTAT6

##read image
Fig7D <- cowplot::ggdraw() + 
  cowplot::draw_image("../../inst/images/LK2_B4_pSTAT6.png", scale = 1)

Fig7D

1.4.5 Figure 7E

Image of IHC cross-section of CLL-infiltrated LN stained for pIRAK4

Fig7E <- cowplot::ggdraw() + 
  cowplot::draw_image( "../../inst/images/CLL1_I3_pIRAK4.png", scale = 1)

Fig7E

1.4.6 Figure 7F

Image of IHC cross-section of healthy LN stained for pIRAK4

Fig7F <- cowplot::ggdraw() + 
  cowplot::draw_image( "../../inst/images/LK2_B4_pIRAK4.png", scale = 1)

Fig7F

1.4.7 Figure 7G

Kaplan-Meier curve to show associations of pSTAT6 levels with treatment free survival.
In Figures 7G and 7H the two pSTAT6 and pIRAK4 groups (high /low) were defined by mean staining intensities dichotomised using maximally selected rank statistics. The same 64 CLL lymph node samples were used for both Kaplan-Meier plots. 50 patient samples were in the high pSTAT6 group, and 52 in the high pIRAK4 group.

Prework for 7G and H

#Define stains for which want to visualize TTT
stains <- c("pSTAT6", "pIRAK4")


#Get optimal cut offs 
stats <- lapply(stains, function(stn){
 
  survival <- dplyr::select(ihc_surv_data, PatientID, Tissue, Diagnosis, Sex, TTT, treatedAfter, stn)
  colnames(survival) <- c("PatientID", "Tissue", "Diagnosis", "Sex", "TTT", "treatedAfter", "target")
  
  #Run test to obtain cut off threshold for high pSTAT6 versus low pSTAT6

  maxtest <- maxstat.test(Surv(TTT, treatedAfter)~ target, 
                          data = survival,
                          smethod = "LogRank",
                          alpha = NULL)
  
  cutpoint <- maxtest$estimate
  
 })
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(stn)` instead of `stn` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
  names(stats) <- c("pSTAT6", "pIRAK4")
  
                                          
  #Annotate by cutoff point
  survdf <- mutate(ihc_surv_data, 
                   phosphoSTAT6 = ifelse(pSTAT6 < stats$pSTAT6, "low", "high"), 
                   phosphoIRAK4 = ifelse(pIRAK4 < stats$pIRAK4, "low", "high"))
  
                                      

  #fit survival models
  f_pstat6 <- survfit(Surv(TTT, treatedAfter) ~ phosphoSTAT6, data= survdf)
  f_pirak4 <- survfit(Surv(TTT, treatedAfter) ~ phosphoIRAK4, data= survdf)
  
  fits <- list(pSTAT6 = f_pstat6, pIRAK4 = f_pirak4)
  
  #Make plot
  gg = ggsurvplot_list(fits, 
                       survdf,  
                       pval=TRUE, 
                       palette=c(palreds[8],palreds[3]),  
                       risk.table = TRUE, legend.title = "", 
                       ggtheme = t2, 
                       legend.labs =list(c("High", "Low"),c("High", "Low")),  
                       xlab="Time in years", ylab="\nTime to next treatment (probability)", title= "", legend = "bottom")
#get plot for pSTAT6 stain
 Fig7G <-
  wrap_elements(gg$pSTAT6$plot + 
                  theme(axis.title.x = element_blank()) +
                  gg$pSTAT6$table + 
                  theme(plot.title = element_blank()) +
                  plot_layout(ncol = 1, heights = c(85, 15)))
  
Fig7G

1.4.8 Figure 7H

Kaplan-Meier curve to show associations of pSTAT6 levels with treatment free survival.

#get plot for pIRAK4 stain 
Fig7H <- wrap_elements(gg$pIRAK4$plot+ 
                         theme(axis.title.x = element_blank()) +
                         gg$pIRAK4$table + 
                         theme(plot.title = element_blank()) +
                  plot_layout(ncol=1, heights = c(85, 15))) 


Fig7H

1.5 Assemble Figure

design1 <-"
  ACG
  ADG
  BEH
  BFH
  "
tp <- theme(plot.tag = element_text(size = 30, vjust = 1, face="plain"))

Fig7 <-
  wrap_elements(Fig7A) + tp +
  wrap_elements(Fig7B) + tp +
  Fig7C + tp +
  Fig7D + tp +
  Fig7E + tp +
  Fig7F + tp +
  
  Fig7G + tp +
  
  Fig7H + tp +
  
  plot_layout(design = design1, widths = c(1,0.7,1), heights =c(0.8, 1, 0.8, 1))+
  plot_annotation(tag_levels = "A", title="Figure 7", theme = theme(title=element_text(size = 20)))
  
Fig7

1.6 Count tables

ihc_surv_data %>% 
  ungroup() %>% 
  mutate(TFT = ifelse(is.na(TFT)|TFT == "NA", "NA", "Available" )) %>% 
  dplyr::group_by(Diagnosis, TFT) %>% 
  count()
## # A tibble: 3 x 3
## # Groups:   Diagnosis, TFT [3]
##   Diagnosis TFT           n
##   <chr>     <chr>     <int>
## 1 CLL       Available    64
## 2 CLL       NA           36
## 3 healthy   NA          100

1.7 Appendix

Sys.info()
##           sysname           release           version          nodename 
##         "Windows"          "10 x64"     "build 19044" "DESKTOP-NR9H6QL" 
##           machine             login              user    effective_user 
##          "x86-64"     "Peter Bruch"     "Peter Bruch"     "Peter Bruch"
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=German_Germany.1252  LC_CTYPE=German_Germany.1252   
## [3] LC_MONETARY=German_Germany.1252 LC_NUMERIC=C                   
## [5] LC_TIME=German_Germany.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] dplyr_1.0.8      ggbeeswarm_0.6.0 rlang_1.0.1      survival_3.2-13 
##  [5] survminer_0.4.9  ggpubr_0.4.0     ggplot2_3.3.5    maxstat_0.7-25  
##  [9] patchwork_1.1.1  BiocStyle_2.22.0
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.8            mvtnorm_1.1-3         lattice_0.20-45      
##  [4] tidyr_1.2.0           zoo_1.8-9             assertthat_0.2.1     
##  [7] digest_0.6.29         utf8_1.2.2            R6_2.5.1             
## [10] backports_1.4.1       evaluate_0.15         highr_0.9            
## [13] pillar_1.7.0          rstudioapi_0.13       data.table_1.14.2    
## [16] car_3.0-12            jquerylib_0.1.4       magick_2.7.3         
## [19] Matrix_1.3-4          rmarkdown_2.13        labeling_0.4.2       
## [22] splines_4.1.2         stringr_1.4.0         gridtext_0.1.4       
## [25] munsell_0.5.0         broom_0.8.0           vipor_0.4.5          
## [28] compiler_4.1.2        xfun_0.29             pkgconfig_2.0.3      
## [31] htmltools_0.5.2       ggtext_0.1.1          tidyselect_1.1.2     
## [34] tibble_3.1.6          gridExtra_2.3         km.ci_0.5-6          
## [37] bookdown_0.25         fansi_1.0.2           crayon_1.5.1         
## [40] withr_2.5.0           grid_4.1.2            jsonlite_1.8.0       
## [43] xtable_1.8-4          gtable_0.3.0          lifecycle_1.0.1      
## [46] DBI_1.1.2             magrittr_2.0.2        KMsurv_0.1-5         
## [49] scales_1.2.0          cli_3.2.0             stringi_1.7.6        
## [52] carData_3.0-5         farver_2.1.0          ggsignif_0.6.3       
## [55] xml2_1.3.3            bslib_0.3.1           ellipsis_0.3.2       
## [58] survMisc_0.5.6        generics_0.1.2        vctrs_0.3.8          
## [61] cowplot_1.1.1         tools_4.1.2           beeswarm_0.4.0       
## [64] glue_1.6.1            markdown_1.1          purrr_0.3.4          
## [67] abind_1.4-5           fastmap_1.1.0         yaml_2.3.5           
## [70] colorspace_2.0-3      BiocManager_1.30.16   rstatix_0.7.0        
## [73] knitr_1.38            sass_0.4.1            exactRankTests_0.8-34